import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
df=pd.read_csv("country_wise_latest.csv")
df.shape
(187, 15)
df.head()
| Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | Deaths / 100 Cases | Recovered / 100 Cases | Deaths / 100 Recovered | Confirmed last week | 1 week change | 1 week % increase | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 36263 | 1269 | 25198 | 9796 | 106 | 10 | 18 | 3.50 | 69.49 | 5.04 | 35526 | 737 | 2.07 | Eastern Mediterranean |
| 1 | Albania | 4880 | 144 | 2745 | 1991 | 117 | 6 | 63 | 2.95 | 56.25 | 5.25 | 4171 | 709 | 17.00 | Europe |
| 2 | Algeria | 27973 | 1163 | 18837 | 7973 | 616 | 8 | 749 | 4.16 | 67.34 | 6.17 | 23691 | 4282 | 18.07 | Africa |
| 3 | Andorra | 907 | 52 | 803 | 52 | 10 | 0 | 0 | 5.73 | 88.53 | 6.48 | 884 | 23 | 2.60 | Europe |
| 4 | Angola | 950 | 41 | 242 | 667 | 18 | 1 | 0 | 4.32 | 25.47 | 16.94 | 749 | 201 | 26.84 | Africa |
df.isnull().sum()
Country/Region 0 Confirmed 0 Deaths 0 Recovered 0 Active 0 New cases 0 New deaths 0 New recovered 0 Deaths / 100 Cases 0 Recovered / 100 Cases 0 Deaths / 100 Recovered 0 Confirmed last week 0 1 week change 0 1 week % increase 0 WHO Region 0 dtype: int64
df.columns
Index(['Country/Region', 'Confirmed', 'Deaths', 'Recovered', 'Active',
'New cases', 'New deaths', 'New recovered', 'Deaths / 100 Cases',
'Recovered / 100 Cases', 'Deaths / 100 Recovered',
'Confirmed last week', '1 week change', '1 week % increase',
'WHO Region'],
dtype='object')
plt.figure(figsize=(10,5))
df.groupby('WHO Region')['Confirmed'].sum().sort_values(ascending = False).plot(kind='bar')
plt.title("Total confiremd cases by region")
plt.xlabel('Region')
plt.ylabel('Total confirmed cases')
plt.grid(axis='y')
plt.xticks(rotation=50)
plt.show()
plt.figure(figsize=(10,5))
df.groupby('Country/Region')['Active'].sum().sort_values(ascending = False).head(10).plot(kind='bar')
plt.title("Top 10 countries with highest active cases")
plt.xlabel('Countries')
plt.ylabel('Active')
plt.grid(True)
plt.xticks(rotation=50)
plt.show()
plt.figure(figsize=(10,5))
correlation=df[['Confirmed','Deaths','Recovered','Active']].corr()
correlation
plt.imshow(correlation,cmap='coolwarm',interpolation='nearest')
plt.colorbar()
plt.xticks(range(len(correlation.columns)),correlation.columns,rotation=45)
plt.yticks(range(len(correlation.columns)),correlation.columns)
plt.show()
df['fatality_rate']=df['Deaths']/df['Confirmed']*100
df['recovery_rate']=df['Recovered']/df['Confirmed']*100
plt.figure(figsize=(10,5))
sns.scatterplot(x='fatality_rate',y='recovery_rate',data=df)
plt.title("fatality_rate vs recovery_rate ")
plt.xlabel('fatality')
plt.ylabel('recovery')
plt.grid(True)
plt.xticks()
plt.show()
plt.figure(figsize=(15,5))
df.groupby('Country/Region')['New cases'].sum().sort_values(ascending = False).head(10).plot(kind='pie')
plt.title("Top 10 countries with highest new cases")
plt.xlabel('Country')
plt.ylabel('New cases')
plt.grid(axis='y')
plt.xticks(rotation=50)
plt.show()
plt.figure(figsize=(10,5))
sns.scatterplot(x='Deaths',y='Recovered',data=df)
plt.title("Deaths vs Recovered ")
plt.xlabel('Deaths')
plt.ylabel('Recovered')
plt.grid(True)
plt.xticks()
plt.show()
df6.index
Index(['US', 'Brazil', 'United Kingdom', 'Mexico', 'Italy', 'India', 'France',
'Spain', 'Peru', 'Iran'],
dtype='object', name='Country/Region')
plt.figure(figsize=(15,5))
df6=df.groupby('Country/Region')['Deaths'].sum().sort_values(ascending = False).head(10)
pie_chart=plt.pie(df6, labels=df6.index, autopct='%0.2f%%',startangle=140)
plt.title(" Top 10 countries with maximum covid reporting ")
plt.grid(True)
plt.xticks()
plt.show()
px.scatter(df,x='Confirmed',y='Deaths',color='WHO Region')
df.head()
| Country/Region | Confirmed | Deaths | Recovered | Active | New cases | New deaths | New recovered | Deaths / 100 Cases | Recovered / 100 Cases | Deaths / 100 Recovered | Confirmed last week | 1 week change | 1 week % increase | WHO Region | fatality_rate | recovery_rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 36263 | 1269 | 25198 | 9796 | 106 | 10 | 18 | 3.50 | 69.49 | 5.04 | 35526 | 737 | 2.07 | Eastern Mediterranean | 3.499435 | 69.486805 |
| 1 | Albania | 4880 | 144 | 2745 | 1991 | 117 | 6 | 63 | 2.95 | 56.25 | 5.25 | 4171 | 709 | 17.00 | Europe | 2.950820 | 56.250000 |
| 2 | Algeria | 27973 | 1163 | 18837 | 7973 | 616 | 8 | 749 | 4.16 | 67.34 | 6.17 | 23691 | 4282 | 18.07 | Africa | 4.157581 | 67.339935 |
| 3 | Andorra | 907 | 52 | 803 | 52 | 10 | 0 | 0 | 5.73 | 88.53 | 6.48 | 884 | 23 | 2.60 | Europe | 5.733186 | 88.533627 |
| 4 | Angola | 950 | 41 | 242 | 667 | 18 | 1 | 0 | 4.32 | 25.47 | 16.94 | 749 | 201 | 26.84 | Africa | 4.315789 | 25.473684 |
#plt.figure(figsize=(15,5))
df9=df.groupby('Country/Region')['Deaths / 100 Cases'].sum().sort_values(ascending = False).head(4)
colors=['green','red','violet','gold']
df9
Country/Region Yemen 28.56 United Kingdom 15.19 Belgium 14.79 Italy 14.26 Name: Deaths / 100 Cases, dtype: float64
plt.barh(df9.index,df9,color=colors)
plt.show()